### Variance estimation for low income proportion: SOEP 2012 ###
### Variance estimation for quantiles of meta-distribution (and min/max) ###

### set seed

  set.seed(71101)

### Load packages ###

  library(foreign)
  library(parallel)
  library(foreach)
  library(doParallel)
  library(doRNG)
  
### Multicore setup ###
  
  cl <- makeForkCluster(3)
  registerDoParallel(cl)

### General setup ###  

  sampling.fraction <- 0.025
  scales <- seq(0.32,0.72,by=0.005) # Scale value
  beta <- 0.5 # Which quintile for poverty line
  alpha <- 0.6 # Which fraction of quintile for poverty line
  qx <- 0.5 # Which quantile of meta distribution?

### Read data ###

  dat1 <- read.dta("/home/christian/Dokumente/Daten/equivar/wv2012.dta",convert.factors=FALSE)
  dat1 <- dat1[,c("d1110612","d1110712","i1110212","l1110212","h1110112")] # Only keep variables of interest
  names(dat1) <- c("family.size","no.children","income","ow","no.child.14") # Rename variables
  N <- dim(dat1)[1] # Population size
  n <- round(sampling.fraction*N,digits=0) # Sample size

### True lips
  
  true.lips <- numeric(length(scales))
  
  for(i in 1:length(scales)) {
    # Calcualte equivalized income (approximated modified OECD-scale)
    dat1$equiv.weight <- dat1$family.size^scales[i]
    dat1[,paste("equiv.inc",i,sep=".")] <- dat1$income/dat1$equiv.weight
    # Poverty line 
    true.poverty.line <- alpha*quantile(dat1[,paste("equiv.inc",i,sep=".")],probs=beta) 
    # Poverty indicator 
    dat1$poor <- 0
    dat1$poor[dat1[,paste("equiv.inc",i,sep=".")]<=true.poverty.line] <- 1
    # Low income proportion 
    true.lips[i] <- mean(dat1$poor) 
  }
  
  true.lip.q <- quantile(true.lips,probs=qx)
  true.lip.min <- min(true.lips)
  true.lip.max <- max(true.lips)
  
### Simulations ###
  
  #for(i in 1:sims) {
  data.mc <- foreach(i = 1:sims, .packages = c("stats"),.combine = rbind) %dorng% {
    
    # Sample
    dat <- dat1[sample(1:N,size=n,rep=F),]
    
    # Vectors for temporary results
    lips <- numeric(length(scales))
    
    for(j in 1:length(scales)) {
      # Calcualte equivalized income (approximated modified OECD-scale)
      # Poverty line 
      poverty.line <- alpha*quantile(dat[,paste("equiv.inc",j,sep=".")],probs=beta) 
      # Poverty indicator 
      dat$poor <- 0
      dat$poor[dat[,paste("equiv.inc",j,sep=".")]<=poverty.line] <- 1
      # Low income proportion 
      lips[j] <- mean(dat$poor)  
    }
    
    # Sample estimates: Median, minimum, maximum
    lipq <- quantile(lips,probs=qx)  
    lipmin <- min(lips)  
    lipmax <- max(lips)  

    # Number of eta for which q(P), min(P), max(P)
    which.eta.q <- which.min(abs(lips-lipq))
    which.eta.min <- which.min(lips)
    which.eta.max <- which.max(lips)
    
    # Values of eta for which q(P), min(P), max(P)
    eta.q <- scales[which.eta.q]
    eta.min <- scales[which.eta.min]
    eta.max <- scales[which.eta.max]
    
    # I(F)
    dat$IF <- NA
    dat$IFmin <- NA
    dat$IFmax <- NA
    
    # I(F) for quantile
      # Calculate lip
      poverty.line <- alpha*quantile(dat[,paste("equiv.inc",which.eta.q,sep=".")],probs=beta) 
      dat$poor <- 0
      dat$poor[dat[,paste("equiv.inc",which.eta.q,sep=".")]<=poverty.line] <- 1
      lip <- mean(dat$poor)
      # Density estimates (bandwidth according to Berger/Skinner after Silverman)
      bandwidth <- 0.79 * (quantile(dat[,paste("equiv.inc",which.eta.q,sep=".")],probs=0.75)-quantile(dat[,paste("equiv.inc",which.eta.q,sep=".")],probs=0.25))* n^(-0.2)
      tmp <- density(dat[,paste("equiv.inc",which.eta.q,sep=".")],bw=bandwidth)
      fe1 <- tmp$y[which.min(abs(tmp$x-poverty.line))]
      fe2 <- tmp$y[which.min(abs(tmp$x-poverty.line/alpha))]
      # Variance via influence function full sample
      dat$below.median <- 0
      dat$below.median[dat[,paste("equiv.inc",which.eta.q,sep=".")]<=median(dat[,paste("equiv.inc",which.eta.q,sep=".")])] <- 1
      dat$z1 <- 1/N * (dat$poor - lip)
      dat$z2 <- -alpha * 1/N * (fe1/fe2) * (dat$below.median-beta)
      dat$IF <- dat$z1+dat$z2
    
    # I(F) for minimum
      # Calculate lip
      poverty.line <- alpha*quantile(dat[,paste("equiv.inc",which.eta.min,sep=".")],probs=beta) 
      dat$poor <- 0
      dat$poor[dat[,paste("equiv.inc",which.eta.min,sep=".")]<=poverty.line] <- 1
      lip <- mean(dat$poor)
      # Density estimates (bandwidth according to Berger/Skinner after Silverman)
      bandwidth <- 0.79 * (quantile(dat[,paste("equiv.inc",which.eta.min,sep=".")],probs=0.75)-quantile(dat[,paste("equiv.inc",which.eta.min,sep=".")],probs=0.25))* n^(-0.2)
      tmp <- density(dat[,paste("equiv.inc",which.eta.min,sep=".")],bw=bandwidth)
      fe1 <- tmp$y[which.min(abs(tmp$x-poverty.line))]
      fe2 <- tmp$y[which.min(abs(tmp$x-poverty.line/alpha))]
      # Variance via influence function full sample
      dat$below.median <- 0
      dat$below.median[dat[,paste("equiv.inc",which.eta.min,sep=".")]<=median(dat[,paste("equiv.inc",which.eta.min,sep=".")])] <- 1
      dat$z1 <- 1/N * (dat$poor - lip)
      dat$z2 <- -alpha * 1/N * (fe1/fe2) * (dat$below.median-beta)
      dat$IFmin <- dat$z1+dat$z2
    
    # I(F) for maximum
      # Calculate lip
      poverty.line <- alpha*quantile(dat[,paste("equiv.inc",which.eta.max,sep=".")],probs=beta) 
      dat$poor <- 0
      dat$poor[dat[,paste("equiv.inc",which.eta.max,sep=".")]<=poverty.line] <- 1
      lip <- mean(dat$poor)
      # Density estimates (bandwidth according to Berger/Skinner after Silverman)
      bandwidth <- 0.79 * (quantile(dat[,paste("equiv.inc",which.eta.max,sep=".")],probs=0.75)-quantile(dat[,paste("equiv.inc",which.eta.max,sep=".")],probs=0.25))* n^(-0.2)
      tmp <- density(dat[,paste("equiv.inc",which.eta.max,sep=".")],bw=bandwidth)
      fe1 <- tmp$y[which.min(abs(tmp$x-poverty.line))]
      fe2 <- tmp$y[which.min(abs(tmp$x-poverty.line/alpha))]
      # Variance via influence function full sample
      dat$below.median <- 0
      dat$below.median[dat[,paste("equiv.inc",which.eta.max,sep=".")]<=median(dat[,paste("equiv.inc",which.eta.max,sep=".")])] <- 1
      dat$z1 <- 1/N * (dat$poor - lip)
      dat$z2 <- -alpha * 1/N * (fe1/fe2) * (dat$below.median-beta)
      dat$IFmax <- dat$z1+dat$z2
    
    # Linearized Variable Full sample, quantile
    dat$z <- dat$IF
    s_var <- (var(dat$z)) * (N*(N-n))/n 
    SVARq.cov <- true.lip.q < (lipq+1.96*sqrt(s_var)) & true.lip.q > (lipq-1.96*sqrt(s_var))  
    # Min
    dat$zmin <- dat$IFmin
    s_var.min <- (var(dat$zmin)) * (N*(N-n))/n 
    SVARmin.cov <- true.lip.min < (lipmin+1.96*sqrt(s_var.min)) & true.lip.min > (lipmin-1.96*sqrt(s_var.min))  
    # Max
    dat$zmax <- dat$IFmax
    s_var.max <- (var(dat$zmax)) * (N*(N-n))/n 
    SVARmax.cov <- true.lip.max < (lipmax+1.96*sqrt(s_var.max)) & true.lip.max > (lipmax-1.96*sqrt(s_var.max))  
    # Simultaneous coverage (Bonferroni)
    SVARminmax.cov <- true.lip.min < (lipmin+2.241403*sqrt(s_var.min)) & true.lip.min > (lipmin-2.241403*sqrt(s_var.min)) & true.lip.max < (lipmax+2.241403*sqrt(s_var.max)) & true.lip.max > (lipmax-2.241403*sqrt(s_var.max))  
    
    # Concentate results
    c(lipq,
      lipmin,
      lipmax,
      s_var,
      s_var.min,
      s_var.max,
      SVARq.cov,
      SVARmin.cov,
      SVARmax.cov,
      SVARminmax.cov)
    
  }
  
### Stop MC ###
  
  stopCluster(cl)
  
### Results as vectors ###
  
  # Quantile
  LIPq <- data.mc[,1] 
  # Min
  LIPmin <- data.mc[,2] 
  # Max
  LIPmax <- data.mc[,3]
  # Variance Quantile  
  SVARq <- data.mc[,4]
  # Variance Minimum
  SVARmin <- data.mc[,5]
  # Variance Maximum
  SVARmax <- data.mc[,6]
  # Coverage CI quantile  
  SVARq.cov <- data.mc[,7]
  # Coverage CI min
  SVARmin.cov <- data.mc[,8]
  # Coverage CI max
  SVARmax.cov <- data.mc[,9]
  # MinMax
  SVARminmax.cov <- data.mc[,10]
  
### Assessment ###
  
  # True Variance/SD
  true.var <- sum((LIPq-true.lip.q)^2)/sims
  true.sd <- sqrt(true.var)
  
  true.var.min <- sum((LIPmin-true.lip.min)^2)/sims
  true.sd.min <- sqrt(true.var.min)
  
  true.var.max <- sum((LIPmax-true.lip.max)^2)/sims
  true.sd.max <- sqrt(true.var.max)
  
  # Relative Bias/Quantile
  (mean(SVARq)-true.var)/true.var
  (mean(sqrt(SVARq))-true.sd)/true.sd
  
  # Relative Bias/Minimum
  (mean(SVARmin)-true.var.min)/true.var.min
  (mean(sqrt(SVARmin))-true.sd.min)/true.sd.min
  
  # Relative Bias/Maximum
  (mean(SVARmax)-true.var.max)/true.var.max
  (mean(sqrt(SVARmax))-true.sd.max)/true.sd.max
  
  # Coverage probabilities/Quantile
  sum(SVARq.cov)/sims
  
  # Coverage probabilities/Minimum
  sum(SVARmin.cov)/sims
  
  # Coverage probabilities/Maximum
  sum(SVARmax.cov)/sims
  
  # Simultaneous coverage
  sum(SVARminmax.cov)/sims
  
  # Interval width
  true.width <- 2*1.96*true.sd
  true.width.min <- 2*1.96*true.sd.min
  true.width.max <- 2*1.96*true.sd.max
  SVAR.width <- 2*1.96*mean(sqrt(SVARq))
  SVAR.width.min <- 2*1.96*mean(sqrt(SVARmin))
  SVAR.width.max <- 2*1.96*mean(sqrt(SVARmax))
  
### Save results ###
  
  datei <- paste("/home/christian/Dokumente/Equiv Variance/Ergebnisse/sim3_",sims,"_",round(sampling.fraction*100),".rda",sep="")
  save(list=ls(),file=datei)
  